#!/usr/bin/python
#encoding:utf-8
# 实现简单的爬取网站的基本信息
from lxml import html
import requests
from fake_useragent import UserAgent
user_agent = UserAgent()
headers = {"user-agent": user_agent.random}
print("headers:", headers)
# http://www.feijisu5.com/acg/3736/972.html
# http://www.feijisu5.com/acg/3736/971.html
# http://www.feijisu5.com/acg/3736/1.html
url = "http://www.feijisu5.com/acg/3736/1.html"
response = requests.get(url)
print(response.status_code)
# 需要进行转码
html_str = response.text
html_str = str(html_str.encode("iso-8859-1"), encoding="utf-8")

print(html_str)
etree = html.etree
html_obj = etree.HTML(html_str)
print(html_obj)

